-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLPVectorizer] Test -1 stride loads. #158358
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-risc-v Author: Mikhail Gudim (mgudim) ChangesAdd a test to generate -1 stride load and flags to force this behavior. Full diff: https://github.com/llvm/llvm-project/pull/158358.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 1cfcd3ffbd664..14a4e1f1efc82 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -198,6 +198,16 @@ static cl::opt<unsigned> MaxProfitableLoadStride(
"slp-max-stride", cl::init(8), cl::Hidden,
cl::desc("The maximum stride, considered to be profitable."));
+static cl::opt<bool>
+ DisableTreeReorder("slp-disable-tree-reorder", cl::init(false), cl::Hidden,
+ cl::desc("Disable tree reordering even if it is "
+ "profitable. Used for testing only."));
+
+static cl::opt<bool>
+ ForceStridedLoads("slp-force-strided-loads", cl::init(false), cl::Hidden,
+ cl::desc("Generate strided loads even if they are not "
+ "profitable. Used for testing only."));
+
static cl::opt<bool>
ViewSLPTree("view-slp-tree", cl::Hidden,
cl::desc("Display the SLP trees with Graphviz"));
@@ -7770,6 +7780,9 @@ static void combineOrders(MutableArrayRef<unsigned> Order,
}
bool BoUpSLP::isProfitableToReorder() const {
+ if (DisableTreeReorder)
+ return false;
+
constexpr unsigned TinyVF = 2;
constexpr unsigned TinyTree = 10;
constexpr unsigned PhiOpsLimit = 12;
@@ -13030,7 +13043,7 @@ void BoUpSLP::transformNodes() {
InstructionCost StridedCost = TTI->getStridedMemoryOpCost(
Instruction::Load, VecTy, BaseLI->getPointerOperand(),
/*VariableMask=*/false, CommonAlignment, CostKind, BaseLI);
- if (StridedCost < OriginalVecCost)
+ if (StridedCost < OriginalVecCost || ForceStridedLoads)
// Strided load is more profitable than consecutive load + reverse -
// transform the node to strided load.
E.State = TreeEntry::StridedVectorize;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll
new file mode 100644
index 0000000000000..77d3ac1fb2322
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-load.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=riscv64 -mattr=+m,+v \
+; RUN: -passes=slp-vectorizer \
+; RUN: -slp-disable-tree-reorder=true -slp-force-strided-loads=true \
+; RUN: -S < %s | FileCheck %s
+
+define void @const_stride_reversed(ptr %pl, ptr %ps) {
+; CHECK-LABEL: define void @const_stride_reversed(
+; CHECK-SAME: ptr [[PL:%.*]], ptr [[PS:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[GEP_L15:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 15
+; CHECK-NEXT: [[GEP_S0:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr align 16 [[GEP_L15]], i64 -1, <16 x i1> splat (i1 true), i32 16)
+; CHECK-NEXT: store <16 x i8> [[TMP2]], ptr [[GEP_S0]], align 16
+; CHECK-NEXT: ret void
+;
+ %gep_l0 = getelementptr inbounds i8, ptr %pl, i64 0
+ %gep_l1 = getelementptr inbounds i8, ptr %pl, i64 1
+ %gep_l2 = getelementptr inbounds i8, ptr %pl, i64 2
+ %gep_l3 = getelementptr inbounds i8, ptr %pl, i64 3
+ %gep_l4 = getelementptr inbounds i8, ptr %pl, i64 4
+ %gep_l5 = getelementptr inbounds i8, ptr %pl, i64 5
+ %gep_l6 = getelementptr inbounds i8, ptr %pl, i64 6
+ %gep_l7 = getelementptr inbounds i8, ptr %pl, i64 7
+ %gep_l8 = getelementptr inbounds i8, ptr %pl, i64 8
+ %gep_l9 = getelementptr inbounds i8, ptr %pl, i64 9
+ %gep_l10 = getelementptr inbounds i8, ptr %pl, i64 10
+ %gep_l11 = getelementptr inbounds i8, ptr %pl, i64 11
+ %gep_l12 = getelementptr inbounds i8, ptr %pl, i64 12
+ %gep_l13 = getelementptr inbounds i8, ptr %pl, i64 13
+ %gep_l14 = getelementptr inbounds i8, ptr %pl, i64 14
+ %gep_l15 = getelementptr inbounds i8, ptr %pl, i64 15
+
+ %load0 = load i8, ptr %gep_l0 , align 16
+ %load1 = load i8, ptr %gep_l1 , align 16
+ %load2 = load i8, ptr %gep_l2 , align 16
+ %load3 = load i8, ptr %gep_l3 , align 16
+ %load4 = load i8, ptr %gep_l4 , align 16
+ %load5 = load i8, ptr %gep_l5 , align 16
+ %load6 = load i8, ptr %gep_l6 , align 16
+ %load7 = load i8, ptr %gep_l7 , align 16
+ %load8 = load i8, ptr %gep_l8 , align 16
+ %load9 = load i8, ptr %gep_l9 , align 16
+ %load10 = load i8, ptr %gep_l10, align 16
+ %load11 = load i8, ptr %gep_l11, align 16
+ %load12 = load i8, ptr %gep_l12, align 16
+ %load13 = load i8, ptr %gep_l13, align 16
+ %load14 = load i8, ptr %gep_l14, align 16
+ %load15 = load i8, ptr %gep_l15, align 16
+
+ %gep_s0 = getelementptr inbounds i8, ptr %ps, i64 0
+ %gep_s1 = getelementptr inbounds i8, ptr %ps, i64 1
+ %gep_s2 = getelementptr inbounds i8, ptr %ps, i64 2
+ %gep_s3 = getelementptr inbounds i8, ptr %ps, i64 3
+ %gep_s4 = getelementptr inbounds i8, ptr %ps, i64 4
+ %gep_s5 = getelementptr inbounds i8, ptr %ps, i64 5
+ %gep_s6 = getelementptr inbounds i8, ptr %ps, i64 6
+ %gep_s7 = getelementptr inbounds i8, ptr %ps, i64 7
+ %gep_s8 = getelementptr inbounds i8, ptr %ps, i64 8
+ %gep_s9 = getelementptr inbounds i8, ptr %ps, i64 9
+ %gep_s10 = getelementptr inbounds i8, ptr %ps, i64 10
+ %gep_s11 = getelementptr inbounds i8, ptr %ps, i64 11
+ %gep_s12 = getelementptr inbounds i8, ptr %ps, i64 12
+ %gep_s13 = getelementptr inbounds i8, ptr %ps, i64 13
+ %gep_s14 = getelementptr inbounds i8, ptr %ps, i64 14
+ %gep_s15 = getelementptr inbounds i8, ptr %ps, i64 15
+
+ store i8 %load0, ptr %gep_s15, align 16
+ store i8 %load1, ptr %gep_s14, align 16
+ store i8 %load2, ptr %gep_s13, align 16
+ store i8 %load3, ptr %gep_s12, align 16
+ store i8 %load4, ptr %gep_s11, align 16
+ store i8 %load5, ptr %gep_s10, align 16
+ store i8 %load6, ptr %gep_s9, align 16
+ store i8 %load7, ptr %gep_s8, align 16
+ store i8 %load8, ptr %gep_s7, align 16
+ store i8 %load9, ptr %gep_s6, align 16
+ store i8 %load10, ptr %gep_s5, align 16
+ store i8 %load11, ptr %gep_s4, align 16
+ store i8 %load12, ptr %gep_s3, align 16
+ store i8 %load13, ptr %gep_s2, align 16
+ store i8 %load14, ptr %gep_s1, align 16
+ store i8 %load15, ptr %gep_s0, align 16
+
+ ret void
+}
|
Add a test to generate -1 stride load and flags to force this behavior.
0362e9a
to
d951054
Compare
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/24/builds/12591 Here is the relevant piece of the build log for the reference
|
Add a test to generate -1 stride load and flags to force this behavior.